April 24th 2018
What is a choropleth?
Where to get (legally) regions shapefiles?
Drawing maps with ggplot in R
require(rgdal) #function readOGR require(rgeos) #function gSimplify and spTransform library(ggplot2) #function fortify
EU_NUTS <- readOGR("data/ref-nuts-2013-03m/", layer = "NUTS_RG_03M_2013_3035",
encoding = "utf8")
## OGR data source with driver: ESRI Shapefile ## Source: "/Users/piotr/szychta_w_danych/prezentacja_brug_18_04_24/data/ref-nuts-2013-03m", layer: "NUTS_RG_03M_2013_3035" ## with 1951 features ## It has 5 fields
#' we need to save the id, since we loose it when transforming into data.frame
EU_NUTS_codes <- data.frame(id = as.character(0:1950),
nuts_id = EU_NUTS@data$NUTS_ID,
stringsAsFactors = FALSE)
#' by simplifying we reduce file size and drawing time
EU_NUTS_simplified <- gSimplify(EU_NUTS, tol = 1000, topologyPreserve = TRUE)
#' changin to mercator (convienent to include eg. localization from google maps)
EU_NUTS_fortified <- spTransform(EU_NUTS_simplified, CRS("+proj=longlat +datum=WGS84"))
EU_NUTS_fortified <- fortify(EU_NUTS_fortified)
#' computing centroids, useful for making labels
centroids_countries = data.frame(gCentroid(EU_NUTS, byid = TRUE))
centroids_countries$id <- rownames((centroids_countries))
glimpse(EU_NUTS_fortified)
## Observations: 349,730 ## Variables: 7 ## $ long <dbl> 22.99716, 23.05680, 23.26778, 23.43325, 23.51521, 23.581... ## $ lat <dbl> 43.80763, 43.79581, 43.84681, 43.85057, 43.83107, 43.797... ## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1... ## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, ... ## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,... ## $ id <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "... ## $ group <fct> 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0...
library(eurostat)
gdp_data <- get_eurostat('tec00001', time_format = "num", stringsAsFactors = F)
## Table tec00001 cached at /var/folders/bb/v3bv876j4nj7sk3ycjyqvhqm0000gn/T//RtmpzSrc9H/eurostat/tec00001_num_code_FF.rds
glimpse(gdp_data)
## Observations: 1,371 ## Variables: 5 ## $ na_item <chr> "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ"... ## $ unit <chr> "CP_EUR_HAB", "CP_EUR_HAB", "CP_EUR_HAB", "CP_EUR_HAB"... ## $ geo <chr> "AL", "AT", "BE", "BG", "CH", "CY", "CZ", "DE", "DK", ... ## $ time <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, ... ## $ values <dbl> 2400, 32400, 31000, 3500, 45600, 21700, 12100, 29500, ...
gdp_data <- gdp_data %>% filter(unit == 'CP_EUR_HAB', nchar(as.character(geo)) == 2) %>% mutate(unit = factor(unit)) gdp_data_labels <- label_eurostat(gdp_data, fix_duplicated = T) glimpse(gdp_data_labels)
## Observations: 418 ## Variables: 5 ## $ na_item <chr> "Gross domestic product at market prices", "Gross dome... ## $ unit <fct> Current prices, euro per capita, Current prices, euro ... ## $ geo <chr> "Albania", "Austria", "Belgium", "Bulgaria", "Switzerl... ## $ time <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, ... ## $ values <dbl> 2400, 32400, 31000, 3500, 45600, 21700, 12100, 29500, ...
plot_data <- EU_NUTS_fortified %>%
inner_join(EU_NUTS_codes,
by = c("id")) %>%
inner_join(gdp_data,
by = c('nuts_id'='geo'))
glimpse(plot_data)
## Observations: 558,275 ## Variables: 12 ## $ long <dbl> 22.99716, 22.99716, 22.99716, 22.99716, 22.99716, 22.9... ## $ lat <dbl> 43.80763, 43.80763, 43.80763, 43.80763, 43.80763, 43.8... ## $ order <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, ... ## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE... ## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... ## $ id <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",... ## $ group <fct> 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,... ## $ nuts_id <chr> "BG", "BG", "BG", "BG", "BG", "BG", "BG", "BG", "BG", ... ## $ na_item <chr> "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ", "B1GQ"... ## $ unit <fct> CP_EUR_HAB, CP_EUR_HAB, CP_EUR_HAB, CP_EUR_HAB, CP_EUR... ## $ time <dbl> 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, ... ## $ values <dbl> 3500, 4200, 4900, 4900, 5100, 5600, 5700, 5800, 5900, ...
plot_data %>%
filter(time == 2016) %>%
ggplot(mapping = aes(x = long, y = lat)) +
geom_polygon(mapping = aes(group = group, fill = values))
plot_data %>%
filter(time == 2016) %>%
ggplot(mapping = aes(x = long, y = lat)) +
scale_y_continuous(limits = c(33, 72)) +
scale_x_continuous(limits = c(-25, 32)) +
#drawing ploygons
geom_polygon(mapping = aes(group = group, fill = values)) +
#set coord system
coord_map() +
#set colors
scale_fill_distiller("GDP", palette = "YlGn",
breaks = pretty_breaks(n = 6),
trans = "reverse") +
guides(fill = guide_legend(reverse = TRUE)) +
labs(title = "GDP per capita",
caption = "Based on eurostat data") +
theme_map(base_size = 18)
breaks_gdp = c(0, 10000, 20000, 30000, 40000,50000, 60000, 150000)
labels_gdp = c('less than 10K', '10-20K', '20-30K', '30-40K', '40-50K', '50-60K', '>60K')
plot_data %>%
mutate(values = cut(values, breaks_gdp, labels = labels_gdp)) %>%
filter(time %in% c(2006, 2016)) %>%
ggplot(mapping = aes(x = long, y = lat)) +
scale_y_continuous(limits = c(33, 72)) +
scale_x_continuous(limits = c(-25, 32)) +
geom_polygon(mapping = aes(group = group, fill = values)) + #draw polygons
coord_map() + #set coord system
scale_fill_brewer("GDP", palette = "RdYlGn") +
facet_wrap(~time, nrow = 1, ncol = 2) +
guides(fill = guide_legend(reverse = TRUE)) +
labs(title = "GDP per capita",
caption = "Based on eurostat data") +
theme_map(base_size = 30, base_family = 'Helvetica Neue Light') +
theme(title = element_text(hjust = 0.5),
legend.position="right",
legend.key.height = unit(1, "cm"),
legend.key.width = unit(0.7, "cm"))
library(Rcartogram)
library(getcartr)
gdp_2016 <- filter(gdp_data, time == 2016)
EU_NUTS_countries <- EU_NUTS[EU_NUTS$LEVL_CODE == 0 & EU_NUTS$NUTS_ID %in% gdp_2016$geo,]
EU_NUTS_countries@data <- EU_NUTS_countries@data %>%
inner_join(gdp_2016, by = c('NUTS_ID'='geo'))
## Warning: Column `NUTS_ID`/`geo` joining factor and character vector, ## coercing into character vector
EU_NUTS_cartogram <- quick.carto(spdf = EU_NUTS_countries, v = EU_NUTS_countries@data$values)
plot_data2 <- inner_join(fortify(EU_NUTS_cartogram), EU_NUTS_codes,
by = c("id")) %>%
inner_join(gdp_2016, by = c('nuts_id'='geo'))